# -------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
# -------------------------------------------------------------

# Autogenerated By   : src/main/python/generator/generator.py
# Autogenerated From : scripts/builtin/glmPredict.dml

from typing import Dict, Iterable

from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES


def glmPredict(X: Matrix,
               B: Matrix,
               **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
     Applies the estimated parameters of a GLM type regression to a new dataset
    
     Additional statistics are printed one per each line, in the following 
    
     .. code-block::
    
       CSV format: NAME,[COLUMN],[SCALED],VALUE
       ---
       NAME   is the string identifier for the statistic, see the table below.
       COLUMN is an optional integer value that specifies the Y-column for per-column statistics;
              note that a Binomial/Multinomial one-column Y input is converted into multi-column.
       SCALED is an optional Boolean value (TRUE or FALSE) that tells us whether or not the input
                dispersion parameter (disp) scaling has been applied to this statistic.
       VALUE  is the value of the statistic.
       ---
    
     .. code-block::
    
       NAME                  COLUMN  SCALED  MEANING
       ---------------------------------------------------------------------------------------------
       LOGLHOOD_Z                      +     Log-Likelihood Z-score (in st.dev's from mean)
       LOGLHOOD_Z_PVAL                 +     Log-Likelihood Z-score p-value
       PEARSON_X2                      +     Pearson residual X^2 statistic
       PEARSON_X2_BY_DF                +     Pearson X^2 divided by degrees of freedom
       PEARSON_X2_PVAL                 +     Pearson X^2 p-value
       DEVIANCE_G2                     +     Deviance from saturated model G^2 statistic
       DEVIANCE_G2_BY_DF               +     Deviance G^2 divided by degrees of freedom
       DEVIANCE_G2_PVAL                +     Deviance G^2 p-value
       AVG_TOT_Y               +             Average of Y column for a single response value
       STDEV_TOT_Y             +             St.Dev. of Y column for a single response value
       AVG_RES_Y               +             Average of column residual, i.e. of Y - mean(Y|X)
       STDEV_RES_Y             +             St.Dev. of column residual, i.e. of Y - mean(Y|X)
       PRED_STDEV_RES          +       +     Model-predicted St.Dev. of column residual
       R2                      +             R^2 of Y column residual with bias included
       ADJUSTED_R2             +             Adjusted R^2 of Y column residual with bias included
       R2_NOBIAS               +             R^2 of Y column residual with bias subtracted
       ADJUSTED_R2_NOBIAS      +             Adjusted R^2 of Y column residual with bias subtracted
       ---------------------------------------------------------------------------------------------
    
    
    
    :param X: Matrix X of records (feature vectors)
    :param B: GLM regression parameters (the betas), with dimensions
        ncol(X)   x k: do not add intercept
        ncol(X)+1 x k: add intercept as given by the last B-row
        if k > 1, use only B[, 1] unless it is Multinomial Logit (dfam=3)
    :param ytest: Response matrix Y, with the following dimensions:
        nrow(X) x 1  : for all distributions (dfam=1 or 2 or 3)
        nrow(X) x 2  : for Binomial (dfam=2) given by (#pos, #neg) counts
        nrow(X) x k+1: for Multinomial (dfam=3) given by category counts
    :param dfam: GLM distribution family: 1 = Power, 2 = Binomial, 3 = Multinomial Logit
    :param vpow: Power for Variance defined as (mean)^power (ignored if dfam != 1):
        0.0 = Gaussian, 1.0 = Poisson, 2.0 = Gamma, 3.0 = Inverse Gaussian
    :param link: Link function code: 0 = canonical (depends on distribution), 1 = Power,
        2 = Logit, 3 = Probit, 4 = Cloglog, 5 = Cauchit; ignored if Multinomial
    :param lpow: Power for Link function defined as (mean)^power (ignored if link != 1):
        -2.0 = 1/mu^2, -1.0 = reciprocal, 0.0 = log, 0.5 = sqrt, 1.0 = identity
    :param disp: Dispersion value, when available
    :param verbose: Print statistics to stdout
    :return: Matrix M of predicted means/probabilities:
        nrow(X) x 1  : for Power-type distributions (dfam=1)
        nrow(X) x 2  : for Binomial distribution (dfam=2), column 2 is "No"
        nrow(X) x k+1: for Multinomial Logit (dfam=3), col# k+1 is baseline
    """

    params_dict = {'X': X, 'B': B}
    params_dict.update(kwargs)
    return Matrix(X.sds_context,
        'glmPredict',
        named_input_nodes=params_dict)
